# %%
import pandas as pd
import numpy as np
import start
from openai import OpenAI
from tqdm import tqdm

prompts_file = start.MAIN_DIR + "prompts_characterization.xlsx"
prompts = pd.read_excel(prompts_file, sheet_name=None)

SHEET_NAME = "Few Shot 3a"
sheet_data = prompts[SHEET_NAME]
prompt = []
for message in sheet_data.index:
    dict_entry = {
        "role": sheet_data.loc[message, "role"],
        "content": sheet_data.loc[message, "content"],
    }
    prompt.append(dict_entry)
# %%

# Initialize the OpenAI client
client = OpenAI(api_key=start.OPENAI_API_KEY)
# %%

df = pd.read_excel(
    start.MAIN_DIR + "data/clean/relevant_tweets.xlsx",
)
df = df[df.text.notnull()]
# %%

# TODO: fix for first run.

previous_classifications = pd.read_excel(
    start.MAIN_DIR + f"data/clean/gpt_classifications_characters.xlsx"
)
not_classified = df[~df.unique_id.isin(previous_classifications.unique_id)]
# %%
# to_classify = not_classified.sample(NUMBER_TO_CLASSIFY, random_state=28)

to_classify = not_classified
# %%
MODEL = "gpt-4o"
SHEET = "Few Shot 3a"


# %%
responses = []
for text in tqdm(to_classify.text):
    messages = prompt + [{"role": "user", "content": text}]

    response = client.chat.completions.create(
        model=MODEL,
        messages=messages,
        temperature=0.00,
    )
    try:
        cleaned_response = response.choices[0].message.content
        responses.append(cleaned_response)
    except:
        responses.append("Error")
        print("Error")
to_classify["response"] = responses

# %%
df = pd.concat([previous_classifications, to_classify])

df.to_excel(
    start.MAIN_DIR + f"data/clean/gpt_classifications_characters.xlsx",
    index=False,
)

# %%
